This document demonstrates the use of the bRF and LASSO-D3S functions for integrative GRN inference.

Those functions infer the regulatory pathways of Arabidopsis thaliana’s roots in response to nitrate (N) induction from Varala et al., 2018.

They use as inputs the expression profiles of N-responsive genes and TFBS information. Prior TFBS information was built by searching in the promoters of the N-responsive genes the PWM of the N-responsive regulators.

Data import

Import of the expression data and the N-responsive genes and regulators :

load('rdata/inference_input_N_response_varala.rdata')
genes <- input_data$grouped_genes; length(genes)
## [1] 1426
tfs <- input_data$grouped_regressors; length(tfs)
## [1] 201
counts <- input_data$counts; dim(counts)
## [1] 1426   45
load("rdata/pwm_occurrences_N_response_varala.rdata")
dim(pwm_occurrence)
## [1] 1426  201
ALPHAS <- seq(0,1, by = 0.1)

# subset <- sample(genes, replace = F, size = 20)
subset <- genes

Lauching the permutations and the true data inferences 100 times

nCores = 45
mats <- list()
nrep <- 100
for(alpha in ALPHAS){ # exploring PWM integration strength
  for(rep in 1:nrep){ # exploring inherent variability
    
    mat_rf <- bRF_inference(counts, genes, tfs, nTrees = 2000,
                            alpha = alpha,
                            pwm_occurrence = pwm_occurrence,
                            nCores = nCores,
                            importance = "%IncMSE")
    
    mat_rf_perm <- bRF_inference(counts, genes, tfs, nTrees = 2000,
                            alpha = alpha, tf_expression_permutation = TRUE,
                            pwm_occurrence = pwm_occurrence,
                            nCores = nCores,
                            importance = "%IncMSE")
    
    # mat_lasso <- LASSO.D3S_inferenceMDA(counts, genes, tfs,
    #                                  alpha = alpha, N = 100,
    #                                  int_pwm_noise = 0, mda_type = "zero",
    #                                  pwm_occurrence = pwm_occurrence,
    #                                  nCores = nCores)
    
    mats[[paste0("bRF_", as.character(alpha),  '_trueData_', rep)]] <- mat_rf
    mats[[paste0("bRF_", as.character(alpha),  '_shuffled_', rep)]] <- mat_rf_perm
    
  }
}
save(mats, file = "results/100_permutations_bRF_importances.rdata")

Thresholding the networks

load( "results/100_permutations_bRF_importances.rdata")
edges <- list()
densities <- c(0.005, 0.01,0.05,0.075)

for(name in names(mats)){ # exploring importance threshold stringency
      for(density in densities){
        edges[[paste0(name, '_', density)]] <-
        bRF_network(mats[[name]], density = density, pwm_occurrence, genes, tfs)
      }
}

save(edges, file = "results/100_permutations_bRF_edges.rdata")

names(edges)
# compure precision and recall
load("results/100_permutations_bRF_edges.rdata")

color_palette = c("#C12131", "#EC5D2F", "#FE945C", "#FFC08E" )
prettyZero <- function(l){
    max.decimals = max(nchar(str_extract(l, "\\.[0-9]+")), na.rm = T)-1
    lnew = formatC(l, replace.zero = T, zero.print = "0",
        digits = max.decimals, format = "f", preserve.width=T)
    return(lnew)
}

settings <- c("method", "alpha", "dataset","rep", "density")

PWM support

# number of edges per network
nrows <-
  data.frame(alpha_rep = names(unlist(lapply(edges, FUN = nrow))),
             n_edges = unlist(lapply(edges, FUN = nrow)))

nrows[, settings] <-
  str_split_fixed(nrows$alpha_rep, '_', length(settings))

edges_num <- lapply(edges, function(df)
  df[sapply(df, is.numeric)])

pwm_support <-
  data.frame(alpha_rep = names(unlist(lapply(edges_num, FUN = nrow))),
             pwm = unlist(lapply(edges_num, FUN = colMeans)))
pwm_support[, settings] <-
  str_split_fixed(pwm_support$alpha_rep, '_', length(settings))

pwm_support %>%
  left_join(nrows, by = settings) %>%
  mutate(alpha = as.numeric(alpha),
         density_label = paste(density, ':', n_edges, 'edges')) %>%
  ggplot(aes(color = density_label, x = alpha, y = pwm)) +
  ggh4x::facet_nested_wrap(vars(method, density), ncol = 8, nest_line = T) +
  geom_point() +
  geom_smooth(aes(fill = density_label, linetype =dataset) , alpha = 0.1) +
  theme(
    strip.background = element_blank(),
    axis.title.x = element_text(size = 22),
    title = element_text(size = 16),
    strip.text = element_text(size = 16),
    legend.position = "top",
    legend.text = element_text(size = 15),
    axis.text = element_text(size = 12)
  ) +
  xlab(expression(alpha)) + ylab("Mean PWM score in selected edges") +
  ggtitle("Average PWM support of inferred edges") +
  guides(color = guide_legend(nrow = 2, byrow = TRUE),
         fill = guide_legend(nrow = 2, byrow = TRUE)) +
  ylab(expression(paste("mean(", pi[tr], ")"))) +
  scale_x_continuous(labels = prettyZero) +
  scale_color_manual(name = "Network density", values = color_palette) +
  scale_fill_manual(name = "Network density", values = color_palette)

## Computing validation metrics

val_conn <-
  evaluate_networks(
    edges,
    validation = c("TARGET", "CHIPSeq", "DAPSeq"),
    nCores = 15,
    input_genes = genes,
    input_tfs = tfs
  )

val_conn[, settings] <-
  str_split_fixed(val_conn$network_name, '_', length(settings))

save(val_conn, file = "results/100_permutations_bRF_validation.rdata")


val_chip <-
  evaluate_networks(
    edges,
    validation = c("CHIPSeq"),
    nCores = 15,
    input_genes = genes,
    input_tfs = tfs
  )

val_chip[, settings] <-
  str_split_fixed(val_chip$network_name, '_', length(settings))
save(val_chip, file = "results/100_permutations_bRF_validation_chip.rdata")

val_target <-
  evaluate_networks(
    edges,
    validation = c("TARGET"),
    nCores = 15,
    input_genes = genes,
    input_tfs = tfs
  )

val_target[, settings] <-
  str_split_fixed(val_target$network_name, '_', length(settings))
save(val_target, file = "results/100_permutations_bRF_validation_target.rdata")


val_dap <-
  evaluate_networks(
    edges,
    validation = c("DAPSeq"),
    nCores = 15,
    input_genes = genes,
    input_tfs = tfs
  )

val_dap[, settings] <-
  str_split_fixed(val_dap$network_name, '_', length(settings))
save(val_dap, file = "results/100_permutations_bRF_validation_dap.rdata")

Plotting validation

ConnecTF

load("results/100_permutations_bRF_validation.rdata")
data_val <- val_conn %>%
    group_by(alpha, dataset, density) %>%
    mutate(mean_precision = mean(precision, na.rm = T),
           sd_precision = sd(precision, na.rm = T),
           mean_recall = mean(recall, na.rm = T),
           sd_recall = sd(recall, na.rm = T)) %>%
  dplyr::select(-network_name) %>%
  left_join(nrows, by = settings) %>%
  mutate(alpha = as.numeric(alpha)) %>%
  mutate(density_label = paste(density, ':', n_edges, 'edges')) 

precision_all <- data_val %>%
    ggplot(aes(
      x = as.numeric(alpha),
      y = precision,
      color = dataset,
      fill = dataset
    ))+ylab("Precision")+
    ggh4x::facet_nested_wrap(vars(method, density_label), ncol = 8, nest_line = T) + 
    geom_ribbon(aes(ymin = mean_precision - sd_precision , 
                    ymax = mean_precision + sd_precision  ), 
                alpha = .4)  +theme_pubr(legend = "top")+
    geom_point(alpha = 0.1) + geom_smooth(se=F)+xlab("alpha") +
  xlab(expression(alpha)) + ylab("Precision") +
  ggtitle("Precision against ConnecTF") +
  theme(
    strip.background = element_blank(),
    axis.title.x = element_text(size = 22),
    title = element_text(size = 16),
    strip.text = element_text(size = 16),
    legend.text = element_text(size = 15),
    axis.text = element_text(size = 12),
    legend.position = 'top'
  );precision_all

recall_all <- data_val%>%
    ggplot(aes(
      x = as.numeric(alpha),
      y = recall,
      color = dataset,
      fill = dataset
    ))+
    ggh4x::facet_nested_wrap(vars(method, density_label), ncol = 8, 
                             nest_line = T, scales = "free") + 
    geom_ribbon(aes(ymin = mean_recall - sd_recall , 
                    ymax = mean_recall + sd_recall  ), 
                alpha = .4)  +theme_pubr(legend = "none")+
    geom_point(alpha = 0.1) + geom_smooth(se=F)+xlab("alpha") +
  xlab(expression(alpha)) + ylab("Recall") +
  ggtitle("Recall against ConnecTF") +
  theme(
    strip.background = element_blank(),
    axis.title.x = element_text(size = 22),
    title = element_text(size = 16),
    strip.text = element_text(size = 16),
    legend.text = element_text(size = 15),
    axis.text = element_text(size = 12),
    legend.position = 'top'
  );recall_all

CHIP Seq only

load("results/100_permutations_bRF_validation_chip.rdata")
data_val <- val_chip %>%
    group_by(alpha, dataset, density) %>%
    mutate(mean_precision = mean(precision, na.rm = T),
           sd_precision = sd(precision, na.rm = T),
           mean_recall = mean(recall, na.rm = T),
           sd_recall = sd(recall, na.rm = T)) %>%
  dplyr::select(-network_name) %>%
  left_join(nrows, by = settings) %>%
  mutate(alpha = as.numeric(alpha)) %>%
  mutate(density_label = paste(density, ':', n_edges, 'edges')) 

data_val %>%
    ggplot(aes(
      x = as.numeric(alpha),
      y = precision,
      color = dataset,
      fill = dataset
    ))+ylab("Precision")+
    ggh4x::facet_nested_wrap(vars(method, density_label), ncol = 8, nest_line = T) + 
    geom_ribbon(aes(ymin = mean_precision - sd_precision , 
                    ymax = mean_precision + sd_precision  ), 
                alpha = .4)  +theme_pubr(legend = "top")+
    geom_point(alpha = 0.1) + geom_smooth(se=F)+xlab("alpha") +
  xlab(expression(alpha)) + ylab("Precision") +
  ggtitle("Precision against CHIP") +
  theme(
    strip.background = element_blank(),
    axis.title.x = element_text(size = 22),
    title = element_text(size = 16),
    strip.text = element_text(size = 16),
    legend.text = element_text(size = 15),
    axis.text = element_text(size = 12),
    legend.position = 'top'
  )

data_val%>%
    ggplot(aes(
      x = as.numeric(alpha),
      y = recall,
      color = dataset,
      fill = dataset
    ))+
    ggh4x::facet_nested_wrap(vars(method, density_label), ncol = 8, 
                             nest_line = T, scales = "free") + 
    geom_ribbon(aes(ymin = mean_recall - sd_recall , 
                    ymax = mean_recall + sd_recall  ), 
                alpha = .4)  +theme_pubr(legend = "none")+
    geom_point(alpha = 0.1) + geom_smooth(se=F)+xlab("alpha") +
  xlab(expression(alpha)) + ylab("Recall") +
  ggtitle("Recall against CHIP") +
  theme(
    strip.background = element_blank(),
    axis.title.x = element_text(size = 22),
    title = element_text(size = 16),
    strip.text = element_text(size = 16),
    legend.text = element_text(size = 15),
    axis.text = element_text(size = 12),
    legend.position = 'top'
  )

TARGET only

load("results/100_permutations_bRF_validation_target.rdata")
data_val <- val_target %>%
    group_by(alpha, dataset, density) %>%
    mutate(mean_precision = mean(precision, na.rm = T),
           sd_precision = sd(precision, na.rm = T),
           mean_recall = mean(recall, na.rm = T),
           sd_recall = sd(recall, na.rm = T)) %>%
  dplyr::select(-network_name) %>%
  left_join(nrows, by = settings) %>%
  mutate(alpha = as.numeric(alpha)) %>%
  mutate(density_label = paste(density, ':', n_edges, 'edges')) 

data_val %>%
    ggplot(aes(
      x = as.numeric(alpha),
      y = precision,
      color = dataset,
      fill = dataset
    ))+ylab("Precision")+
    ggh4x::facet_nested_wrap(vars(method, density_label), ncol = 8, nest_line = T) + 
    geom_ribbon(aes(ymin = mean_precision - sd_precision , 
                    ymax = mean_precision + sd_precision  ), 
                alpha = .4)  +theme_pubr(legend = "top")+
    geom_point(alpha = 0.1) + geom_smooth(se=F)+xlab("alpha") +
  xlab(expression(alpha)) + ylab("Precision") +
  ggtitle("Precision against TARGET") +
  theme(
    strip.background = element_blank(),
    axis.title.x = element_text(size = 22),
    title = element_text(size = 16),
    strip.text = element_text(size = 16),
    legend.text = element_text(size = 15),
    axis.text = element_text(size = 12),
    legend.position = 'top'
  )

data_val%>%
    ggplot(aes(
      x = as.numeric(alpha),
      y = recall,
      color = dataset,
      fill = dataset
    ))+
    ggh4x::facet_nested_wrap(vars(method, density_label), ncol = 8, 
                             nest_line = T, scales = "free") + 
    geom_ribbon(aes(ymin = mean_recall - sd_recall , 
                    ymax = mean_recall + sd_recall  ), 
                alpha = .4)  +theme_pubr(legend = "none")+
    geom_point(alpha = 0.1) + geom_smooth(se=F)+xlab("alpha") +
  xlab(expression(alpha)) + ylab("Recall") +
  ggtitle("Recall against TARGET") +
  theme(
    strip.background = element_blank(),
    axis.title.x = element_text(size = 22),
    title = element_text(size = 16),
    strip.text = element_text(size = 16),
    legend.text = element_text(size = 15),
    axis.text = element_text(size = 12),
    legend.position = 'top'
  )

DAP Seq only

load("results/100_permutations_bRF_validation_dap.rdata")
data_val <- val_dap %>%
    group_by(alpha, dataset, density) %>%
    mutate(mean_precision = mean(precision, na.rm = T),
           sd_precision = sd(precision, na.rm = T),
           mean_recall = mean(recall, na.rm = T),
           sd_recall = sd(recall, na.rm = T)) %>%
  dplyr::select(-network_name) %>%
  left_join(nrows, by = settings) %>%
  mutate(alpha = as.numeric(alpha)) %>%
  mutate(density_label = paste(density, ':', n_edges, 'edges')) 

data_val %>%
    ggplot(aes(
      x = as.numeric(alpha),
      y = precision,
      color = dataset,
      fill = dataset
    ))+ylab("Precision")+
    ggh4x::facet_nested_wrap(vars(method, density_label), ncol = 8, nest_line = T) + 
    geom_ribbon(aes(ymin = mean_precision - sd_precision , 
                    ymax = mean_precision + sd_precision  ), 
                alpha = .4)  +theme_pubr(legend = "top")+
    geom_point(alpha = 0.1) + geom_smooth(se=F)+xlab("alpha") +
  xlab(expression(alpha)) + ylab("Precision") +
  ggtitle("Precision against DAP") +
  theme(
    strip.background = element_blank(),
    axis.title.x = element_text(size = 22),
    title = element_text(size = 16),
    strip.text = element_text(size = 16),
    legend.text = element_text(size = 15),
    axis.text = element_text(size = 12),
    legend.position = 'top'
  )

data_val%>%
    ggplot(aes(
      x = as.numeric(alpha),
      y = recall,
      color = dataset,
      fill = dataset
    ))+
    ggh4x::facet_nested_wrap(vars(method, density_label), ncol = 8, 
                             nest_line = T, scales = "free") + 
    geom_ribbon(aes(ymin = mean_recall - sd_recall , 
                    ymax = mean_recall + sd_recall  ), 
                alpha = .4)  +theme_pubr(legend = "none")+
    geom_point(alpha = 0.1) + geom_smooth(se=F)+xlab("alpha") +
  xlab(expression(alpha)) + ylab("Recall") +
  ggtitle("Recall against DAP") +
  theme(
    strip.background = element_blank(),
    axis.title.x = element_text(size = 22),
    title = element_text(size = 16),
    strip.text = element_text(size = 16),
    legend.text = element_text(size = 15),
    axis.text = element_text(size = 12),
    legend.position = 'top'
  )

How are those curves for genes from the cluster of genes with a MSE decreased significantly by data integration?

load("results/clusters_mse_bRF_100permutations.rdata")
positive_genes <- names(clusters_rf[clusters_rf==2])
positive_nets <- lapply(edges, function(net){net[net$to %in% positive_genes,]})
#frees up RAM
rm(edges)

val_conn_pos_true <-
  evaluate_networks(
    positive_nets[names(positive_nets)[str_detect(names(positive_nets), "trueData")]],
    validation = c("TARGET", "CHIPSeq", "DAPSeq"),
    nCores = 10,
    input_genes = genes,
    input_tfs = tfs
  )

val_conn_pos_shuffled <-
  evaluate_networks(
    positive_nets[names(positive_nets)[!str_detect(names(positive_nets), "trueData")]],
    validation = c("TARGET", "CHIPSeq", "DAPSeq"),
    nCores = 10,
    input_genes = genes,
    input_tfs = tfs
  )

val_conn_pos <- rbind.data.frame(val_conn_pos_true, val_conn_pos_shuffled)
val_conn_pos[, settings] <-
  str_split_fixed(val_conn_pos$network_name, '_', length(settings))
save(val_conn_pos, file = "results/100_permutations_bRF_validation_cluster_pos.rdata")



val_chip <-
  evaluate_networks(
    positive_nets,
    validation = c("CHIPSeq"),
    nCores = 10,
    input_genes = genes,
    input_tfs = tfs
  )

val_chip[, settings] <-
  str_split_fixed(val_chip$network_name, '_', length(settings))
save(val_chip, file = "results/100_permutations_bRF_validation_chip_cluster_pos.rdata")


val_target <-
  evaluate_networks(
    positive_nets,
    validation = c("TARGET"),
    nCores = 10,
    input_genes = genes,
    input_tfs = tfs
  )

val_target[, settings] <-
  str_split_fixed(val_target$network_name, '_', length(settings))
save(val_target, file = "results/100_permutations_bRF_validation_target_cluster_pos.rdata")


val_dap <-
  evaluate_networks(
    positive_nets,
    validation = c("DAPSeq"),
    nCores = 10,
    input_genes = genes,
    input_tfs = tfs
  )

val_dap[, settings] <-
  str_split_fixed(val_dap$network_name, '_', length(settings))
save(val_dap, file = "results/100_permutations_bRF_validation_dap_cluster_pos.rdata")

ConnecTF

load(file = "results/100_permutations_bRF_validation_cluster_pos.rdata")
data_val <- val_conn_pos %>%
    group_by(alpha, dataset, density) %>%
    mutate(mean_precision = mean(precision, na.rm = T),
           sd_precision = sd(precision, na.rm = T),
           mean_recall = mean(recall, na.rm = T),
           sd_recall = sd(recall, na.rm = T)) %>%
  dplyr::select(-network_name) %>%
  left_join(nrows, by = settings) %>%
  mutate(alpha = as.numeric(alpha)) %>%
  mutate(density_label = paste(density, ':', n_edges, 'edges')) 

data_val %>%
    ggplot(aes(
      x = as.numeric(alpha),
      y = precision,
      color = dataset,
      fill = dataset
    ))+ylab("Precision")+
    ggh4x::facet_nested_wrap(vars(method, density_label), ncol = 8, nest_line = T) + 
    geom_ribbon(aes(ymin = mean_precision - sd_precision , 
                    ymax = mean_precision + sd_precision  ), 
                alpha = .4)  +theme_pubr(legend = "top")+
    geom_point(alpha = 0.1) + geom_smooth(se=F)+xlab("alpha") +
  xlab(expression(alpha)) + ylab("Precision") +
  ggtitle("Precision against ConnecTF, positive genes") +
  theme(
    strip.background = element_blank(),
    axis.title.x = element_text(size = 22),
    title = element_text(size = 16),
    strip.text = element_text(size = 16),
    legend.text = element_text(size = 15),
    axis.text = element_text(size = 12),
    legend.position = 'top'
  )

precision_all

data_val%>%
    ggplot(aes(
      x = as.numeric(alpha),
      y = recall,
      color = dataset,
      fill = dataset
    ))+
    ggh4x::facet_nested_wrap(vars(method, density_label), ncol = 8, 
                             nest_line = T, scales = "free") + 
    geom_ribbon(aes(ymin = mean_recall - sd_recall , 
                    ymax = mean_recall + sd_recall  ), 
                alpha = .4)  +theme_pubr(legend = "none")+
    geom_point(alpha = 0.1) + geom_smooth(se=F)+xlab("alpha") +
  xlab(expression(alpha)) + ylab("Recall") +
  ggtitle("Recall against ConnecTF, postitive genes") +
  theme(
    strip.background = element_blank(),
    axis.title.x = element_text(size = 22),
    title = element_text(size = 16),
    strip.text = element_text(size = 16),
    legend.text = element_text(size = 15),
    axis.text = element_text(size = 12),
    legend.position = 'top'
  ) 

recall_all

CHIP only

load(file = "results/100_permutations_bRF_validation_chip_cluster_pos.rdata")
data_val <- val_chip %>%
    group_by(alpha, dataset, density) %>%
    mutate(mean_precision = mean(precision, na.rm = T),
           sd_precision = sd(precision, na.rm = T),
           mean_recall = mean(recall, na.rm = T),
           sd_recall = sd(recall, na.rm = T)) %>%
  dplyr::select(-network_name) %>%
  left_join(nrows, by = settings) %>%
  mutate(alpha = as.numeric(alpha)) %>%
  mutate(density_label = paste(density, ':', n_edges, 'edges')) 

data_val %>%
    ggplot(aes(
      x = as.numeric(alpha),
      y = precision,
      color = dataset,
      fill = dataset
    ))+ylab("Precision")+
    ggh4x::facet_nested_wrap(vars(method, density_label), ncol = 8, nest_line = T) + 
    geom_ribbon(aes(ymin = mean_precision - sd_precision , 
                    ymax = mean_precision + sd_precision  ), 
                alpha = .4)  +theme_pubr(legend = "top")+
    geom_point(alpha = 0.1) + geom_smooth(se=F)+xlab("alpha") +
  xlab(expression(alpha)) + ylab("Precision") +
  ggtitle("Precision against CHIPSeq, positive genes") +
  theme(
    strip.background = element_blank(),
    axis.title.x = element_text(size = 22),
    title = element_text(size = 16),
    strip.text = element_text(size = 16),
    legend.text = element_text(size = 15),
    axis.text = element_text(size = 12),
    legend.position = 'top'
  )

data_val%>%
    ggplot(aes(
      x = as.numeric(alpha),
      y = recall,
      color = dataset,
      fill = dataset
    ))+
    ggh4x::facet_nested_wrap(vars(method, density_label), ncol = 8, 
                             nest_line = T, scales = "free") + 
    geom_ribbon(aes(ymin = mean_recall - sd_recall , 
                    ymax = mean_recall + sd_recall  ), 
                alpha = .4)  +theme_pubr(legend = "none")+
    geom_point(alpha = 0.1) + geom_smooth(se=F)+xlab("alpha") +
  xlab(expression(alpha)) + ylab("Recall") +
  ggtitle("Recall against CHIPSeq, postitive genes") +
  theme(
    strip.background = element_blank(),
    axis.title.x = element_text(size = 22),
    title = element_text(size = 16),
    strip.text = element_text(size = 16),
    legend.text = element_text(size = 15),
    axis.text = element_text(size = 12),
    legend.position = 'top'
  ) 

TARGET only

load(file = "results/100_permutations_bRF_validation_target_cluster_pos.rdata")
data_val <- val_target %>%
    group_by(alpha, dataset, density) %>%
    mutate(mean_precision = mean(precision, na.rm = T),
           sd_precision = sd(precision, na.rm = T),
           mean_recall = mean(recall, na.rm = T),
           sd_recall = sd(recall, na.rm = T)) %>%
  dplyr::select(-network_name) %>%
  left_join(nrows, by = settings) %>%
  mutate(alpha = as.numeric(alpha)) %>%
  mutate(density_label = paste(density, ':', n_edges, 'edges')) 

data_val %>%
    ggplot(aes(
      x = as.numeric(alpha),
      y = precision,
      color = dataset,
      fill = dataset
    ))+ylab("Precision")+
    ggh4x::facet_nested_wrap(vars(method, density_label), ncol = 8, nest_line = T) + 
    geom_ribbon(aes(ymin = mean_precision - sd_precision , 
                    ymax = mean_precision + sd_precision  ), 
                alpha = .4)  +theme_pubr(legend = "top")+
    geom_point(alpha = 0.1) + geom_smooth(se=F)+xlab("alpha") +
  xlab(expression(alpha)) + ylab("Precision") +
  ggtitle("Precision against TARGET, positive genes") +
  theme(
    strip.background = element_blank(),
    axis.title.x = element_text(size = 22),
    title = element_text(size = 16),
    strip.text = element_text(size = 16),
    legend.text = element_text(size = 15),
    axis.text = element_text(size = 12),
    legend.position = 'top'
  )

data_val%>%
    ggplot(aes(
      x = as.numeric(alpha),
      y = recall,
      color = dataset,
      fill = dataset
    ))+
    ggh4x::facet_nested_wrap(vars(method, density_label), ncol = 8, 
                             nest_line = T, scales = "free") + 
    geom_ribbon(aes(ymin = mean_recall - sd_recall , 
                    ymax = mean_recall + sd_recall  ), 
                alpha = .4)  +theme_pubr(legend = "none")+
    geom_point(alpha = 0.1) + geom_smooth(se=F)+xlab("alpha") +
  xlab(expression(alpha)) + ylab("Recall") +
  ggtitle("Recall against TARGET, postitive genes") +
  theme(
    strip.background = element_blank(),
    axis.title.x = element_text(size = 22),
    title = element_text(size = 16),
    strip.text = element_text(size = 16),
    legend.text = element_text(size = 15),
    axis.text = element_text(size = 12),
    legend.position = 'top'
  ) 

DAP only

load(file = "results/100_permutations_bRF_validation_dap_cluster_pos.rdata")
data_val <- val_dap %>%
    group_by(alpha, dataset, density) %>%
    mutate(mean_precision = mean(precision, na.rm = T),
           sd_precision = sd(precision, na.rm = T),
           mean_recall = mean(recall, na.rm = T),
           sd_recall = sd(recall, na.rm = T)) %>%
  dplyr::select(-network_name) %>%
  left_join(nrows, by = settings) %>%
  mutate(alpha = as.numeric(alpha)) %>%
  mutate(density_label = paste(density, ':', n_edges, 'edges')) 

data_val %>%
    ggplot(aes(
      x = as.numeric(alpha),
      y = precision,
      color = dataset,
      fill = dataset
    ))+ylab("Precision")+
    ggh4x::facet_nested_wrap(vars(method, density_label), ncol = 8, nest_line = T) + 
    geom_ribbon(aes(ymin = mean_precision - sd_precision , 
                    ymax = mean_precision + sd_precision  ), 
                alpha = .4)  +theme_pubr(legend = "top")+
    geom_point(alpha = 0.1) + geom_smooth(se=F)+xlab("alpha") +
  xlab(expression(alpha)) + ylab("Precision") +
  ggtitle("Precision against DAPSeq, positive genes") +
  theme(
    strip.background = element_blank(),
    axis.title.x = element_text(size = 22),
    title = element_text(size = 16),
    strip.text = element_text(size = 16),
    legend.text = element_text(size = 15),
    axis.text = element_text(size = 12),
    legend.position = 'top'
  )

data_val%>%
    ggplot(aes(
      x = as.numeric(alpha),
      y = recall,
      color = dataset,
      fill = dataset
    ))+
    ggh4x::facet_nested_wrap(vars(method, density_label), ncol = 8, 
                             nest_line = T, scales = "free") + 
    geom_ribbon(aes(ymin = mean_recall - sd_recall , 
                    ymax = mean_recall + sd_recall  ), 
                alpha = .4)  +theme_pubr(legend = "none")+
    geom_point(alpha = 0.1) + geom_smooth(se=F)+xlab("alpha") +
  xlab(expression(alpha)) + ylab("Recall") +
  ggtitle("Recall against DAPSeq, postitive genes") +
  theme(
    strip.background = element_blank(),
    axis.title.x = element_text(size = 22),
    title = element_text(size = 16),
    strip.text = element_text(size = 16),
    legend.text = element_text(size = 15),
    axis.text = element_text(size = 12),
    legend.position = 'top'
  )